To see how we got here, check the notebooks with the loading the GFS atmospheric forecast data, the loading of the buoy data, the loading of the platform positions, the loading of the glider positions, and the evaluation of the slab-ocean model.
Details: https://github.com/willirath/nia-prediction-low-latitudes/
# parameters
# regional coverage
lat_min, lat_max = -20, 30
lon_min, lon_max = 310, 20
# good forecasts reach
good_forecast_days = 7 # 7 days
# Platforms and buoys to be plotted
selected_platforms = ['SO']
selected_buoys = [300034013902340]
selected_glider = ['ifm14']
drift_buoy_drop_history_before = "2021-07-10"
# additional buoy positions to be plotted
# The ones found in the downloaded data will be shown anyway.
added_stations = [
{"kind": "buoy", "name": "Pirata Buoy", "lat": 20.0, "lon": -38.0},
{"kind": "buoy", "name": "Pirata Buoy", "lat": 15.0, "lon": -38.0},
{"kind": "buoy", "name": "Pirata Buoy", "lat": 21.0, "lon": -23.0},
{"kind": "buoy", "name": "Pirata Buoy", "lat": 12.0, "lon": -23.0},
{"kind": "buoy", "name": "Pirata Buoy", "lat": -6.0, "lon": -10.0},
{"kind": "buoy", "name": "Pirata Buoy", "lat": -10.0 ,"lon": -10.0},
{"kind": "poi", "name": "Seamount Annan", "lat": 9.25, "lon": -21.333},
{"kind": "poi", "name": "Seamount Carter", "lat": 9.0, "lon": -20.33}
]
# data files
GFS_zarr_store = "tmp_GFS.zarr"
slab_zarr_store = "tmp_slab.zarr"
buoy_file_name = "tmp_buoy_data"
buoy_positions_file = "tmp_buoy_positions.csv"
drift_buoy_history_file = "tmp_buoy_history.csv"
mimoc_mld_file = "tmp_mimoc_mld.nc"
platforms_file = "data/platforms.csv"
gliders_file = "data/gliders.csv"
# dask specifics
dask_kwargs = {"n_workers": 1, "threads_per_worker": 2, "memory_limit": 6e9}
Before doing any calculations, we'll need to import a few modules. We'll also start a Dask cluster for parallel execution.
# dask
from dask.distributed import Client
# plotting
from bokeh.models.formatters import DatetimeTickFormatter
import cartopy.crs as ccrs
import cmocean
import geoviews as gv
import holoviews as hv
import hvplot.xarray, hvplot.pandas
# numerics
import numpy as np
import pandas as pd
import xarray as xr
# aux
from functools import reduce
from operator import add
import requests
# create Dask cluster
client = Client(**dask_kwargs)
client
platform_positions = pd.read_csv(platforms_file, index_col=[0, ]).loc[selected_platforms]
platform_positions["longitude"] = platform_positions["longitude"] % 360.0
platform_positions["kind"] = "vessel"
platform_positions["name"] = platform_positions["platform"].apply(lambda s: f"R/V {s}")
platform_positions = platform_positions.rename(columns={
"longitude": "lon",
"latitude": "lat",
})
platform_positions
glider_positions = pd.read_csv(gliders_file, index_col=[0, ]).loc[selected_glider]
glider_positions["longitude"] = glider_positions["longitude"] % 360.0
glider_positions["kind"] = "glider"
glider_positions["name"] = glider_positions["platform"]
glider_positions = glider_positions.rename(columns={
"longitude": "lon",
"latitude": "lat",
})
glider_positions["name"] = glider_positions["name"].apply(lambda s: f"Glider {s}")
glider_positions
buoy_positions = pd.read_csv(buoy_positions_file)
buoy_positions = buoy_positions.query(('@lat_min <= lat <= @lat_max') and ('@lon_min <= lon <= @lon_max'))
buoy_positions["kind"] = "buoy"
buoy_positions["name"] = "Pirata Buoy"
buoy_positions.head(3)
def get_drifting_buoy_info(buoy: int):
"""Get current status for drifting buoy.
Parameters
----------
buoy : int
Buoy ID.
Returns
-------
dict
"name": Buoy id.
"lon": Longitude.
"lat": Latitude.
"time": Time stamp of position.
"""
buoy_info = requests.get(
f"https://data.geomar.de/realtime/data/project/{buoy}/{buoy}_pos.json",
).json()
time, lat, lon = buoy_info
return {
"name": 'db' + str(buoy)[-4:],
"time": time,
"lat": lat,
"lon": lon,
}
def get_all_buoys_df(buoys: list = None):
return pd.DataFrame(
{
pfname: get_drifting_buoy_info(pfname) for pfname in buoys
}).T
drift_buoy_positions = get_all_buoys_df(selected_buoys)
drift_buoy_positions.to_csv("data/d_buoys.csv", index = False)
drift_buoy_positions = pd.read_csv("data/d_buoys.csv")
drift_buoy_positions['name'] = drift_buoy_positions['name'].apply(str)
drift_buoy_positions["kind"] = "drifting buoy"
drift_buoy_positions.head(3)
added_stations = pd.DataFrame(added_stations)
added_stations.head(3)
all_positions = pd.concat((
df.reset_index().drop(columns=["index", ])
for df in [
added_stations, buoy_positions, drift_buoy_positions, platform_positions, glider_positions,
]
))
all_positions["lon"] %= 360.0
all_positions["lat"] = all_positions["lat"].round(decimals=3)
all_positions["lon"] = all_positions["lon"].round(decimals=3)
all_positions = all_positions.set_index(pd.Series(range(len(all_positions))))
all_positions.head(3)
ds_GFS = xr.open_zarr(GFS_zarr_store)
ds_slab = xr.open_zarr(slab_zarr_store)
ds_mld = xr.open_dataset(mimoc_mld_file)
We'll need the time stamp of the start of the forecasting data.
start_of_forecast = (~ds_GFS["is_forecast"].astype(bool)).sum().compute().data
start_of_forecast = ds_GFS["time"].data[max(0, start_of_forecast-1)]
print(start_of_forecast)
good_forecast_time = np.timedelta64(good_forecast_days, "D")
ds_GFS = ds_GFS.roll(lon=(ds_slab.dims['lon'] // 2)).sel(
lat=slice(lat_max, lat_min),
lon=slice(lon_min, lon_max),
)
ds_slab = ds_slab.roll(lon=(ds_slab.dims['lon'] // 2)).sel(
lat=slice(lat_max, lat_min),
lon=slice(lon_min, lon_max),
)
ds_GFS
ds_slab
if lon_min < lon_max:
all_positions = all_positions[
all_positions["lat"].between(lat_min, lat_max)
& all_positions["lon"].between(lon_min, lon_max)
]
else:
all_positions = all_positions[
all_positions["lat"].between(lat_min, lat_max)
& ~all_positions["lon"].between(min(lon_min, lon_max), max(lon_min, lon_max))
]
all_positions
db_history = pd.read_csv(drift_buoy_history_file).set_index("Time")
db_history = db_history.loc[drift_buoy_drop_history_before:]
db_history
years_data = list(ds_slab.time.groupby("time.year").groups.keys())
years_min = min(years_data)
years_max = min(years_data)
duration_years_data = years_max - years_min + 1
# pad by one
years_min -= 1
duration_years_data += 2
mld_time_coord_lower = xr.DataArray(
[
np.datetime64(f"{years_min + m // 12:04d}-{(m % 12) + 1:02d}-01")
for m in range(0, duration_years_data * 12)
],
dims=("time", )
)
mld_time_coord_upper = xr.DataArray(
[
np.datetime64(f"{years_min + m // 12:04d}-{(m % 12) + 1:02d}-01")
for m in range(1, duration_years_data * 12 + 1)
],
dims=("time", )
)
mld_time_coord = (
mld_time_coord_lower
+ (mld_time_coord_upper - mld_time_coord_lower) / 2.0
)
display(mld_time_coord)
mld_expand = xr.concat(
[ds_mld for n in range(duration_years_data)],
dim="month"
).rename({"month": "time"})
mld_expand.coords["time"] = mld_time_coord
mld_expand
mld_slab = mld_expand.interp_like(
ds_slab.coords["time"]
).sel(
lat=ds_slab.coords["lat"], lon=ds_slab.coords["lon"], method="nearest"
)
mld_slab = mld_slab.compute()
display(mld_slab)
ds_slab["u_slab"] *= ds_slab.attrs["slab_model_H"] / mld_slab.mixed_layer_depth
ds_slab["v_slab"] *= ds_slab.attrs["slab_model_H"] / mld_slab.mixed_layer_depth
ds_slab["umag_slab"] *= ds_slab.attrs["slab_model_H"] / mld_slab.mixed_layer_depth
We'll plot the time-maximum of near-inertial speed for the good forecast period and for the whole forecast period.
First, we construct the plot.
marker_mapping = {
"buoy": "circle",
"drifting buoy": "diamond",
"vessel": "triangle",
"poi": "asterisk",
"glider": "cross",
}
markers = hv.dim("kind").categorize(marker_mapping, default="diamond")
slab_umag_good_forecast_max = ds_slab["umag_slab"].sel(
time=slice(start_of_forecast, start_of_forecast + good_forecast_time)
).max("time")
slab_umag_whole_forecast_max = ds_slab["umag_slab"].sel(
time=slice(start_of_forecast, None)
).max("time")
slab_umag_good_forecast_max = slab_umag_good_forecast_max.assign_coords({"lon": (((slab_umag_good_forecast_max.lon + 180) % 360) - 180)})
slab_umag_whole_forecast_max = slab_umag_whole_forecast_max.assign_coords({"lon": (((slab_umag_whole_forecast_max.lon + 180) % 360) - 180)})
all_positions['lon'] = (((all_positions.lon + 180) % 360) - 180)
near_inertial_max_plots = (
(
slab_umag_good_forecast_max.hvplot(
x="lon", y="lat", z="umag_slab",
clim=(0, 1.0),
cmap=cmocean.cm.speed,
frame_width=800,
hover=False,
geo=True, coastline=True,
crs=ccrs.PlateCarree(), projection=ccrs.PlateCarree(),
title="Near-inertial speed max [m/s] and mixed-layer-depth [m], good forecast period"
)
+ slab_umag_whole_forecast_max.hvplot(
x="lon", y="lat", z="umag_slab",
clim=(0, 1.0),
cmap=cmocean.cm.speed,
frame_width=800,
hover=False,
geo=True, coastline=True,
crs=ccrs.PlateCarree(), projection=ccrs.PlateCarree(),
title="Near-inertial speed max [m/s] and mixed-layer-depth [m], whole forecast period"
)
) * mld_slab.mean("time").mixed_layer_depth.hvplot.contour(
x="lon", y="lat", geo=True, cmap="gray", hover=True,
levels=list(range(0, 50, 10)) + list(range(60, 120, 20)), line_width=1.5, alpha=0.5
) * all_positions.hvplot.points(
y="lat", x="lon", geo=True, coastline=True,
marker=markers,
fill_color=None, line_color="black",
line_width=2, size=70,
hover=True, hover_cols=["kind", "name", "leg", "lat", "lon", "time"]
) * db_history.hvplot.line(
x="Longitude", y="Latitude", geo=True,
color="gray", line_widht=1.5, alpha=0.7,
hover=True, hover_cols=["Time", "Latitude", "Longitude"]
) * gv.feature.grid()
).cols(1)
display(near_inertial_max_plots)
time_series_plots = []
time_formatter = DatetimeTickFormatter(
months='%b %Y', days='%b %d'
)
forecast_spans = (
hv.VSpan(
start_of_forecast, start_of_forecast + good_forecast_time
).opts(padding=0, color='lightgray')
* hv.VSpan(
start_of_forecast + good_forecast_time, None
).opts(padding=0, color='pink')
)
mld_slab['lon'] = (((mld_slab.lon + 180) % 360) - 180)
ds_slab['lon'] = (((ds_slab.lon + 180) % 360) - 180)
ds_GFS['lon'] = (((ds_slab.lon + 180) % 360) - 180)
for lat, lon, name in zip(all_positions["lat"], all_positions["lon"], all_positions["name"]):
local_mld = mld_slab.mixed_layer_depth.sel(lat=lat, lon=lon, method='nearest').mean('time').data
name = f"{name}: {lat}N {lon}E, MLD={local_mld:.0f}m"
all_pos_ds = ds_slab.sel(lat=lat, lon=lon, method="nearest")
all_pos_ds["U20"] = ds_GFS["U20"].sel(lat=lat, lon=lon, method="nearest")
all_pos_ds["V20"] = ds_GFS["V20"].sel(lat=lat, lon=lon, method="nearest")
if (all_pos_ds["umag_slab"].max("time").isnull().data.compute()):
continue
time_series_plots.append(
(
(
forecast_spans.redim.label(y="u_slab")
* all_pos_ds["u_slab"].hvplot.line(label="zonal near-inertial current")
* all_pos_ds["v_slab"].hvplot.line(label="meridional near-inertial current")
* all_pos_ds["umag_slab"].hvplot.line(label="near-inertial speed")
).options(
width=800, height=160, show_grid=True,
xaxis=None,
legend_cols=False, legend_position='right',
ylabel="current [m/s]", title=name
)
+ (
forecast_spans.redim.label(y="U20")
* all_pos_ds["U20"].hvplot.line(label="zonal wind (20m)")
* all_pos_ds["V20"].hvplot.line(label="meridional wind (20m)")
).options(
width=800, height=160, show_grid=True,
xformatter=time_formatter,
legend_cols=False, legend_position='right',
ylabel="wind [m/s]", xlabel=""
)
)
)
time_series_plots = reduce(add, time_series_plots).cols(1)
display(time_series_plots)
To get a feeling for the atmospheric conditions, we'll plot sea-level pressure anomalies every 12 hours for 3 days before and throughout the whole forecast period.
Anomalies are calculated relative to the whole data period (usually 30+14 days).
SLP = ds_GFS["SLP"].compute()
SLP_mean = SLP.mean("time")
SLP_anomaly = (SLP - SLP_mean)
plot_every = np.timedelta64(12, "h")
max_iter = ((SLP_anomaly.coords["time"].max("time") - start_of_forecast) / plot_every).item() // 1 + 1
plot_times = [
(start_of_forecast + n * plot_every)
for n in range(-6, int(max_iter))
]
plots = []
for plot_time in plot_times:
title = f"SLP anomaly [hPa], {pd.Timestamp(plot_time).strftime('%Y-%m-%d %H:%M:%S UTC')}"
if plot_time > start_of_forecast:
title += f"\t(forecast + {(plot_time - start_of_forecast) / np.timedelta64(1, 'h')}h)"
try:
plots.append(
(
SLP_anomaly.sel(time=plot_time, method="nearest").compute().hvplot(
clim=(-10, 10),
cmap=cmocean.cm.delta,
frame_width=800,
geo=True, coastline=True,
crs=ccrs.PlateCarree(), projection=ccrs.PlateCarree(),
hover=False
)
* all_positions.hvplot.points(
y="lat", x="lon", geo=True, coastline=True,
marker=markers,
fill_color=None, line_color="black",
line_width=2, size=70,
hover=True, hover_cols=["kind", "name", "leg", "lat", "lon", "time"]
)
* gv.feature.grid()
).opts(
title=title,
show_grid=True
)
)
except Exception as e:
print(f"for {plot_time} I got: {e}")
slp_plot = reduce(add, plots).cols(1)
display(slp_plot)